package org.openedit.entermedia.search; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; import java.util.HashMap; import java.util.HashSet; import java.util.Hashtable; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.DateTools.Resolution; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.util.Version; import org.openedit.data.Searcher; import org.openedit.data.lucene.BaseLuceneSearcher; import org.openedit.data.lucene.CompositeAnalyzer; import org.openedit.data.lucene.FullTextAnalyzer; import org.openedit.data.lucene.NullAnalyzer; import org.openedit.data.lucene.RecordLookUpAnalyzer; import org.openedit.entermedia.Category; import org.openedit.entermedia.CategoryArchive; import org.openedit.entermedia.MediaArchive; import com.openedit.OpenEditException; import com.openedit.WebPageRequest; import com.openedit.hittracker.HitTracker; import com.openedit.hittracker.SearchQuery; import com.openedit.hittracker.Term; /** * Thesaurus searcher for a Lucene index. * * @author jvalencia * */ public class RelatedKeywordLuceneSearcher extends BaseLuceneSearcher implements RelatedKeywordSearcher { private static Log log = LogFactory.getLog(RelatedKeywordLuceneSearcher.class); /** * Gets a list of suggestions for a specific search. This is done looking at * the "description" search term in the search query inside the "hits" page * value set by any fieldSearch method. If success, the * hits.getSearchQuery().setSuggestedSearches() method is being called. If * there's a "searcher" page value in the request, suggested terms with 0 * hits will be discarded. * * @param inReq * The web request. Needs a HitTracker "hits" page value, a * "catalogid" and a "type" request parameter. The last one * should be something like "asset", "job"or "user". * @return Nothing. * @throws OpenEditException */ public Map<String, String> getSuggestions(HitTracker inTracker, Searcher inTypeSearcher) throws Exception { if (inTracker == null || inTypeSearcher == null) { log.error("A tracker and a searcher are needed in order to get suggestions."); return null; } SearchQuery typeQuery = inTracker.getSearchQuery(); if (typeQuery == null) { return null; } Map<String, String> suggestions = new Hashtable<String, String>(); Term keyword = null; for ( Object o: typeQuery.getTerms() ) { Term term = (Term) o; if ("description".equals(term.getDetail().getId())) { keyword = term; break; } } if (keyword != null) { SearchQuery suggestionsQuery = createSearchQuery(); List<String> keywordValueList = new ArrayList<String>(); String keywordValue = keyword.getValue(); Object[] keywordValues = keyword.getValues(); if(keywordValue == null && keywordValues == null) { return suggestions; } // Get keyword values if(keywordValue == null) { for(int i=0; i < keywordValues.length;++i) { keywordValueList.add((String)keywordValues[i]); } } else { keywordValueList.add(keywordValue); } String nospace = ""; // Remove spaces and add to suggestion query for (int i=0; i < keywordValueList.size();++i) { nospace = keywordValueList.get(i).replace(' ', '_'); if (nospace.contains("*")) //* messes up our logic { return suggestions; } //word is a cached version of results suggestionsQuery.addMatches("word", nospace); } HitTracker wordsHits = search(suggestionsQuery); if (wordsHits == null || wordsHits.size() == 0) { indexWord(keyword.getValue(), inTracker, inTypeSearcher); wordsHits = search(suggestionsQuery); } if (wordsHits.size() > 0) { Object row = wordsHits.get(0); /* Check for timestamp */ String stamp = wordsHits.getValue(row, "timestamp"); GregorianCalendar timestamp = new GregorianCalendar(); timestamp.setTime(DateTools.stringToDate(stamp)); GregorianCalendar yesterday = new GregorianCalendar(); yesterday.add(Calendar.DATE, -1); if (timestamp.before(yesterday)) { /* Reindex */ getIndexWriter().deleteDocuments(new org.apache.lucene.index.Term("timestamp", stamp)); indexWord(keyword.getValue(), inTracker, inTypeSearcher); wordsHits = search(suggestionsQuery); if (wordsHits.size() > 0) { row = wordsHits.get(0); } } String text = wordsHits.getValue(row, "synonyms"); if (text != null) { String[] hits = text.split(";"); for (int i = 0; i < hits.length; i++) { String word = hits[i]; int index = word.lastIndexOf('('); if( index > -1) { String key = word.substring(0, index); suggestions.put(key, word); } } } } } if (suggestions.size() > 0) { inTracker.getSearchQuery().setSuggestedSearches(suggestions); } return suggestions; } public void indexWord( String inWord, HitTracker inResults, Searcher inTypeSearcher ) throws Exception { if (inWord == null || inWord.equals("")) { return; } HashSet<String> terms = new HashSet<String>(); int count = 0; for( Object o: inResults ) { count++; if( count > 50) { break; //Dont look over the entire result set } String keywords = inResults.getValue(o, "keywords"); if( keywords != null ) { for( String keyword: keywords.split(" ") ) { keyword = keyword.trim(); if( keyword.length() > 1 && !keyword.equals(inWord) ) { terms.add(keyword); } } } if(terms.size() > 9) break; } //Now check for categories? count = 0; if( terms.size() < 9) { for( Object o: inResults ) { count++; if( count > 50) { break; //Dont look over the entire result set } String catalogid = inResults.getValue(o, "catalogid"); String categoryid = inResults.getValue(o, "category"); if( catalogid != null && categoryid != null && !"index".equals(categoryid ) ) { CategoryArchive archive = getMediaArchive(catalogid).getCategoryArchive(); for( String keyword: categoryid.split(" ") ) { keyword = keyword.trim(); if( keyword.length() > 0 && !keyword.equals(inWord) ) { Category cat = archive.getCategory(keyword); if( cat != null ) { keyword = cat.getName(); terms.add(keyword); } } } } if(terms.size() > 9) break; } } Document doc = new Document(); StringBuffer saved = new StringBuffer(); StringBuffer savedenc = new StringBuffer(); //Find out how many asset hits exists for (String synonym: terms) { SearchQuery typeQuery = inTypeSearcher.createSearchQuery(); synonym = synonym.replaceAll("\\(.*?\\)", ""); synonym = synonym.replace("(", "").replace(")", "").replace("-", ""); typeQuery.addStartsWith("description", synonym); typeQuery.setHitsName("relatedkeywords"); try { int hits = inTypeSearcher.search(typeQuery).getTotal(); if (hits > 1) { saved.append(synonym); saved.append(" ("); saved.append(hits); saved.append(")"); saved.append(";"); synonym = synonym.replace(' ', '_').replace(";", " "); savedenc.append(synonym); savedenc.append(" "); } } catch (Exception ex) { log.error(ex); } } // Need to make sure that the terms they actually searched for got // into the index if (saved.length() > 0) { doc.add(new Field("synonyms", saved.toString(), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); } doc.add(new Field("synonymsenc", savedenc.toString(), Store.NO, Index.ANALYZED_NO_NORMS)); doc.add(new Field("word", inWord.replace(" ", "_"), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); /* Timestamp */ String timestamp = DateTools.dateToString(new Date(), Resolution.SECOND); doc.add(new Field("timestamp", timestamp, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); getIndexWriter().addDocument(doc, getAnalyzer()); clearIndex(); } public Analyzer getAnalyzer() { if (fieldAnalyzer == null) { Map map = new HashMap(); map.put("synonymsenc", new FullTextAnalyzer(Version.LUCENE_41)); map.put("synonyms", new NullAnalyzer()); map.put("word", new NullAnalyzer()); PerFieldAnalyzerWrapper composite = new PerFieldAnalyzerWrapper( new RecordLookUpAnalyzer() , map); fieldAnalyzer = composite; } return fieldAnalyzer; } public HitTracker getAllHits(WebPageRequest inReq) { return null; } public void reIndexAll(IndexWriter writer, TaxonomyWriter inTaxonomyWriter) throws OpenEditException { // //do nothing // try // { // writer.setMergeFactor(100); // writer.setMaxBufferedDocs(2000); // } // catch (Exception ex) // { // throw new OpenEditException(ex); // } } protected MediaArchive getMediaArchive(String inCatalogId) { return (MediaArchive)getSearcherManager().getModuleManager().getBean(inCatalogId, "mediaArchive"); } }